package org.bjtu.word2vec;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;
import org.springframework.web.multipart.commons.CommonsMultipartResolver;

import cn.edu.bjtu.abstractimpl.analyzer.AnsjDocumentAnalyzer;
import cn.edu.bjtu.classimpl.parser.HiveWechatParser;
import cn.edu.bjtu.classimpl.parser.LineParser;
import cn.edu.bjtu.interfaces.document.IDocument;
import cn.edu.bjtu.interfaces.parser.Parser;
import cn.edu.bjtu.interfaces.segment.DocumentSegmentation;
import cn.edu.bjtu.interfaces.vector.IDocumentVector;
import cn.edu.bjtu.model.word2vec.Word2VEC;
import cn.edu.bjtu.model.word2vec.Word2VecForTransformingDocs;
import cn.edu.bjtu.model.word2vec.domain.Word2Vec;
import cn.edu.bjtu.tools.FileInfo;
import cn.edu.bjtu.tools.ModelInfo;
import cn.edu.bjtu.tools.ThreadLocalDateUtil;
import cn.edu.bjtu.tools.message.DocumentVectorMessage;
import cn.edu.bjtu.tools.message.FileInfoMessage;
import cn.edu.bjtu.tools.message.ModelInfoMessage;
import net.sf.json.JSONObject;

@RestController
@RequestMapping({ "/model/" })
public class ModelController extends BaseController {
	private static final Log LOG = LogFactory.getLog(ModelController.class);
	// 模型文件路径
	private static String RESOURCE_MODEL = "";
	// 训练数据文件夹路径
	private static String TRAIN_FILE = "";
	// 需转化的临时文件
	private static String TRNSFORMFILE = "";
	// 绪转化的多文件文件夹路径
	private static String TRANSFORM_DIR = "";
	// 初始化部分变量
	static {
		RESOURCE_MODEL = ApplicationConfig.getInstance().getProperty("word2vec_model_dir") + "word2vecModel";
		TRAIN_FILE = ApplicationConfig.getInstance().getProperty("word2vec_trainfile_dir");
		TRANSFORM_DIR = ApplicationConfig.getInstance().getProperty("transform_dir") + "multifiles";
		LOG.info(TRAIN_FILE);
	}

	// 获取训练数据信息（Word2Vec）
	@RequestMapping(value = { "query/dataset" }, method = { RequestMethod.GET })
	public String getTrainFile() {
		String result = "";
		File trainFileDir = new File(TRAIN_FILE);
		FileInfoMessage fileInfoMessage;
		if (trainFileDir.exists()) { // 训练数据文件夹路径
			if (trainFileDir.list().length > 0) {
				List<FileInfo> fileInfo = super.getFileInfo(trainFileDir);
				fileInfoMessage = getReturnMessage(200, "", FileInfoMessage.class);
				fileInfoMessage.setFileInfo(fileInfo);
				result = JSONObject.fromObject(fileInfoMessage).toString();
			} else {
				fileInfoMessage = new FileInfoMessage(400, "the train file is null");
				result = JSONObject.fromObject(fileInfoMessage).toString();
			}
		} else {
			fileInfoMessage = new FileInfoMessage(400, "the train file dir isn't exist!");
			fileInfoMessage.setFileInfo(null);
			result = JSONObject.fromObject(fileInfoMessage).toString();
		}
		return result;

	}

	// 建模过程（Word2Vec）
	@RequestMapping(value = ("get/build"))
	public String word2VecmModeling() {
		// initpara();
		String result = "";
		// 文件夹路径
		File trainFile = new File(TRAIN_FILE);
		ModelInfoMessage modelInfoMessage = null;
		if (trainFile.exists()) {
			Word2Vec learn = new Word2Vec();
			try {
				boolean learnResult = false;
				long start1 = System.currentTimeMillis();
				learnResult = learn.learnFile(trainFile);
				long end1 = System.currentTimeMillis();
				if (learnResult) {
					boolean saveModel = false;
					long start2 = System.currentTimeMillis();
					saveModel = learn.saveModel(new File(RESOURCE_MODEL));
					long end2 = System.currentTimeMillis();
					if (saveModel) {
						modelInfoMessage = new ModelInfoMessage(200, "");
						ModelInfo modelInfo = new ModelInfo(trainFile.getName(), end1 - start1 + end2 - start2,
								"file_only");
						modelInfoMessage.setModelInfo(modelInfo);
						result = JSONObject.fromObject(modelInfoMessage).toString();
					} else {
						modelInfoMessage = new ModelInfoMessage(2, "saving mode fail at" + RESOURCE_MODEL);
						result = JSONObject.fromObject(modelInfoMessage).toString();
					}
				} else {
					modelInfoMessage = new ModelInfoMessage(3, "leaning trainFile is fail at " + TRAIN_FILE);
					result = JSONObject.fromObject(modelInfoMessage).toString();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		} else {
			modelInfoMessage = new ModelInfoMessage(404, "trainfile isn't exist");
			result = JSONObject.fromObject(modelInfoMessage).toString();
		}
		return result;
	}

	// 文本格式（直接以文本形式用http传输）
	@RequestMapping(value = { "post/trans" }, method = { RequestMethod.POST, RequestMethod.GET })
	public String strToVec(@RequestParam(value = "text", required = true) String text) {
		String result = "";
		if (modelIsExist()) {
			// initpara();
			DocumentVectorMessage documentVectorMessage;
			if (text.trim().length() == 0) {
				documentVectorMessage = new DocumentVectorMessage(404, "The doc is null");
				result = JSONObject.fromObject(documentVectorMessage).toString();
			} else if (text.trim().length() > 8192) {
				documentVectorMessage = new DocumentVectorMessage(-1, "The url is to large");
				result = JSONObject.fromObject(documentVectorMessage).toString();
			} else {
				// 分词工具
				DocumentSegmentation docSeg = new AnsjDocumentAnalyzer();
				Word2VecForTransformingDocs doctran = Word2VecForTransformingDocs.getInstance(docSeg);
				documentVectorMessage = new DocumentVectorMessage(200, "success");
				List<IDocumentVector> documentVectors = doctran.transformDoc(text);
				documentVectorMessage.setData(documentVectors);
				result = JSONObject.fromObject(documentVectorMessage).toString();
			}
		} else {
			ModelInfoMessage modelInfoMessage = null;
			modelInfoMessage = new ModelInfoMessage(404, "model isn't exist");
			result = JSONObject.fromObject(modelInfoMessage).toString();
		}
		return result;
	}

	// 单文件（单文件单文档、单文件多文档）
	@RequestMapping(value = { "post/transfile" }, method = RequestMethod.POST)
	public static void docToVec(HttpServletRequest req, HttpServletResponse res) throws Exception {
		String result = "";
		if (modelIsExist()) {
			// initpara();
			TRNSFORMFILE = ApplicationConfig.getInstance().getProperty("transform_dir")
					+ ThreadLocalDateUtil.getDateFormat().format(new Date()) + ".txt";
			// File filetransform = new File(TRNSFORMFILE);
			// if (!filetransform.exists()) {
			// filetransform.createNewFile();
			// }
			// 流处理方式，防止因未关闭流引起的问题
			System.out.println(TRNSFORMFILE);
			try (BufferedWriter writer = new BufferedWriter(
					new OutputStreamWriter(new FileOutputStream(TRNSFORMFILE), "utf-8"))) {
				// 创建一个通用的多部分解析器
				CommonsMultipartResolver multipartResolver = new CommonsMultipartResolver(
						req.getSession().getServletContext());
				// 判断 request 是否有文件上传,即多部分请求
				if (multipartResolver.isMultipart(req)) {
					// 转换成多部分request
					MultipartHttpServletRequest multiRequest = (MultipartHttpServletRequest) req;
					// 取得request中的所有文件名
					Iterator<String> iter = multiRequest.getFileNames();
					while (iter.hasNext()) {
						// 取得上传文件
						MultipartFile file = multiRequest.getFile(iter.next());
						if (file != null) {
							BufferedReader br = new BufferedReader(
									new InputStreamReader(file.getInputStream(), "utf-8"));
							String line = "";
							while ((line = br.readLine()) != null) {
								writer.write(line);
								writer.newLine();
							}
						}
					}

				}
			}
			System.out.println(TRNSFORMFILE);
			File file = new File(TRNSFORMFILE);
			if (file.exists()&&getFileSize(file)) {
				DocumentVectorMessage documentVectorMessage;
				DocumentSegmentation docSeg = new AnsjDocumentAnalyzer();
				Word2VecForTransformingDocs doctran = Word2VecForTransformingDocs.getInstance(docSeg);
				try {
					// 数据量大时直接返回会出现堆栈溢出(2017-05-07)
					documentVectorMessage = new DocumentVectorMessage(200, "success");
					List<IDocumentVector> documentVectors = doctran.transform(doctran.getDoucument(TRNSFORMFILE));
					documentVectorMessage.setData(documentVectors);
					result = JSONObject.fromObject(documentVectorMessage).toString();
					OutPutStream(result,res);	
				} catch (IOException e) {
					e.printStackTrace();
				} finally {
					// 文件转换之后删除文件
					if (file.exists()) {
						file.delete();
					}
				}
			} else {
				FileInfoMessage fileInfoMessage = new FileInfoMessage(1, "file isn't exist!");
				result = JSONObject.fromObject(fileInfoMessage).toString();
				OutPutStream(result,res);
			}
		} else {
			ModelInfoMessage modelInfoMessage = null;
			modelInfoMessage = new ModelInfoMessage(404, "model isn't exist");
			result = JSONObject.fromObject(modelInfoMessage).toString();
			OutPutStream(result,res);
		}
	}

	// 多文件转换（多文件单文档、多文件多文档）
	@RequestMapping(value = { "post/transfiles" }, method = RequestMethod.POST)
	public static void docsToVecs(HttpServletRequest req, HttpServletResponse res) throws IOException {
		if (modelIsExist()) {
			List<IDocumentVector> iter = null;
			File filelist = null;
			// 创建一个通用的多部分解析器
			CommonsMultipartResolver multipartResolver = new CommonsMultipartResolver(
					req.getSession().getServletContext());
			// 判断 request 是否有文件上传,即多部分请求
			if (multipartResolver.isMultipart(req)) {
				// 转换成多部分request
				MultipartHttpServletRequest multiRequest = (MultipartHttpServletRequest) req;
				// 取得request中的所有文件名
				Iterator<String> iter1 = multiRequest.getFileNames();
				filelist = new File(TRANSFORM_DIR + File.separator
						+ ThreadLocalDateUtil.getDateFormat().format(new Date()) + ".txt");
				if (!filelist.exists()) {
					filelist.createNewFile();
				}
				try (BufferedWriter writer = new BufferedWriter(
						new OutputStreamWriter(new FileOutputStream(filelist), "utf-8"))) {
					while (iter1.hasNext()) {
						// 取得上传文件
						MultipartFile file = multiRequest.getFile(iter1.next());
						if (file != null) {
							try (BufferedReader br = new BufferedReader(
									new InputStreamReader(file.getInputStream(), "utf-8"))) {

								String line = "";
								while ((line = br.readLine()) != null) {
									writer.write(line);
									writer.newLine();
								}
							}
						}
					}
				}

			}

			// if (!modelIsExist()) {
			// word2VecmModeling();
			// }
			TRNSFORMFILE = ApplicationConfig.getInstance().getProperty("transform_dir")
					+ ThreadLocalDateUtil.getDateFormat().format(new Date());

			File dir = new File(TRANSFORM_DIR);
			DocumentSegmentation docSeg = new AnsjDocumentAnalyzer();
			Word2VecForTransformingDocs doctran = Word2VecForTransformingDocs.getInstance(docSeg);
			if (dir.exists() && dir.isDirectory()) {
				iter = doctran.transform(doctran.getDocuments(TRANSFORM_DIR));
			} else if (dir.exists() && dir.isFile()) {
				iter = doctran.transform(doctran.getDoucument(TRANSFORM_DIR));
			}
			// 写入文件
			writeToFile(iter);
			// 数据返回(文件流形式)
			try (FileInputStream hFile = new FileInputStream(TRNSFORMFILE)) {
				int length = hFile.available();
				byte[] data = new byte[length];
				hFile.read(data);
				// 得到向客户端输出二进制数据的对象
				OutputStream toClient = res.getOutputStream();
				// 输出数据
				toClient.write(data);
				toClient.flush();
				toClient.close();
				if (filelist.exists()) {
					filelist.delete();
				}
			}

		}
	}
	// 单文件（单文件单文档、单文件多文档文件较大时）
		@RequestMapping(value = { "post/transbigfile" }, method = RequestMethod.POST)
		public static void bigDocToVec(HttpServletRequest req, HttpServletResponse res) throws Exception {
			String result = "";
			if (modelIsExist()) {
				// initpara();
				TRNSFORMFILE = ApplicationConfig.getInstance().getProperty("transform_dir")
						+ ThreadLocalDateUtil.getDateFormat().format(new Date()) + ".txt";
				// File filetransform = new File(TRNSFORMFILE);
				// if (!filetransform.exists()) {
				// filetransform.createNewFile();
				// }
				// 流处理方式，防止因未关闭流引起的问题
				try (BufferedWriter writer = new BufferedWriter(
						new OutputStreamWriter(new FileOutputStream(TRNSFORMFILE), "utf-8"))) {
					// 创建一个通用的多部分解析器
					CommonsMultipartResolver multipartResolver = new CommonsMultipartResolver(
							req.getSession().getServletContext());
					// 判断 request 是否有文件上传,即多部分请求
					if (multipartResolver.isMultipart(req)) {
						// 转换成多部分request
						MultipartHttpServletRequest multiRequest = (MultipartHttpServletRequest) req;
						// 取得request中的所有文件名
						Iterator<String> iter = multiRequest.getFileNames();
						while (iter.hasNext()) {
							// 取得上传文件
							MultipartFile file = multiRequest.getFile(iter.next());
							if (file != null) {
								BufferedReader br = new BufferedReader(
										new InputStreamReader(file.getInputStream(), "utf-8"));
								String line = "";
								while ((line = br.readLine()) != null) {
									writer.write(line);
									writer.newLine();
								}
							}
						}

					}
				}
				File file = new File(TRNSFORMFILE);
				if (file.exists()) {
					DocumentVectorMessage documentVectorMessage;
					DocumentSegmentation docSeg = new AnsjDocumentAnalyzer();
					Word2VecForTransformingDocs doctran = Word2VecForTransformingDocs.getInstance(docSeg);
//					try {
//						// 数据量大时直接返回会出现堆栈溢出(2017-05-07)
//						documentVectorMessage = new DocumentVectorMessage(200, "success");
//						List<IDocumentVector> documentVectors = doctran.transform(doctran.getDoucument(TRNSFORMFILE));
//						documentVectorMessage.setData(documentVectors);
//						result = JSONObject.fromObject(documentVectorMessage).toString();
//						OutPutStream(result,res);
//						for(IDocumentVector vec:documentVectors){
//							List<IDocumentVector> temp = new ArrayList<IDocumentVector>();
//							temp.add(vec);
//							documentVectorMessage.setData(temp);
//							String temp_result = JSONObject.fromObject(documentVectorMessage).toString();
//							OutPutStream(temp_result,res);
//						}
						//修改处理方式，一行一行处理，处理后返回（2017-05-07）
						try(BufferedReader ber = new BufferedReader(new InputStreamReader(
								new FileInputStream(file), "UTF-8"))){
							String line ="";
							Parser p = null;
							while ((line = ber.readLine()) != null) {
								IDocument document;
								documentVectorMessage = new DocumentVectorMessage(200, "success");
								if (line.split("\t").length > 8) {
									p = new HiveWechatParser();
									document = p.parse(line);
								} else {
									p = new LineParser();
									document = p.parse(line);
								}
								if(document.getContent().replaceAll(" ", "").length()>0){
									List<IDocumentVector> temp = new ArrayList<IDocumentVector>();
									IDocumentVector docVec = doctran.transform(document);
									temp.add(docVec);
									documentVectorMessage.setData(temp);
									String temp_result = JSONObject.fromObject(documentVectorMessage).toString();
									OutPutStream(temp_result,res);
								}
								}
									
					} catch (IOException e) {
						e.printStackTrace();
					} finally {
						// 文件转换之后删除文件
						if (file.exists()) {
							file.delete();
						}
					}
				} else {
					FileInfoMessage fileInfoMessage = new FileInfoMessage(1, "file isn't exist!");
					result = JSONObject.fromObject(fileInfoMessage).toString();
					OutPutStream(result,res);
				}
			} else {
				ModelInfoMessage modelInfoMessage = null;
				modelInfoMessage = new ModelInfoMessage(404, "model isn't exist");
				result = JSONObject.fromObject(modelInfoMessage).toString();
				OutPutStream(result,res);
			}
		}
	// 判断模型是否存在
	private static boolean modelIsExist() {
		File file = new File(RESOURCE_MODEL);
		LOG.info(file.getAbsolutePath());
		if (file.exists())
			return true;
		else
			return false;
	}

	// 写入文件
	private static void writeToFile(List<IDocumentVector> iter) throws IOException {
		TRNSFORMFILE = ApplicationConfig.getInstance().getProperty("transform_dir")
				+ ThreadLocalDateUtil.getDateFormat().format(new Date());
		try (BufferedWriter writer = new BufferedWriter(
				new OutputStreamWriter(new FileOutputStream(TRNSFORMFILE), "utf-8"))) {
			for (IDocumentVector doc : iter) {
				StringBuilder sb = new StringBuilder();
				sb.append(doc.getDocLabel() + " ");
				for (int i = 0; i < doc.getVector().size(); i++) {
					sb.append((i + 1) + ":" + doc.getVector().get(i)).append(" ");
				}
				String line = sb.toString();
				writer.write(line);
				writer.newLine();
			}
		}

	}

	/**
	 * 获取训练文件信息
	 * 
	 * @return
	 */
	/*
	 * private static FileInfo getFileInfo() { FileInfo fileInfo = supper
	 * 
	 * File file = new File(TRAIN_FILE); try { if (file.exists()) { FileChannel
	 * fc = null; FileInputStream fis; fis = new FileInputStream(file); fc =
	 * fis.getChannel(); long size = fc.size(); String fileName =
	 * file.getName(); String modifyDate =
	 * ThreadLocalDateUtil.getDateFormat().format(file.lastModified()); fileInfo
	 * = new FileInfo(fileName, size, modifyDate);
	 * 
	 * } } catch (Exception e) { // TODO Auto-generated catch block
	 * e.printStackTrace(); }
	 * 
	 * return fileInfo; }
	 */

	/**
	 * 判断模型是否存在
	 * 
	 * @return
	 * @throws IOException
	 */
	@RequestMapping(value = { "query/exist" })
	public String judgeModelExist() throws IOException {
		initpara();
		Word2VEC word2Vec = new Word2VEC();
		String result = "";
		ModelInfoMessage modelInfoMessage;
		File file = new File(RESOURCE_MODEL);
		if (!file.exists()) {
			modelInfoMessage = new ModelInfoMessage(404, "");
			ModelInfo modelInfo = new ModelInfo("no_exist");
			modelInfoMessage.setModelInfo(modelInfo);
			result = JSONObject.fromObject(modelInfoMessage).toString();
		} else {
			boolean mem = word2Vec.loadJavaModel(RESOURCE_MODEL);
			if (mem) {
				modelInfoMessage = new ModelInfoMessage(200, "");
				ModelInfo modelInfo = new ModelInfo("file_mem");
				modelInfoMessage.setModelInfo(modelInfo);
				result = JSONObject.fromObject(modelInfoMessage).toString();
			} else {
				modelInfoMessage = new ModelInfoMessage(20, "");
				ModelInfo modelInfo = new ModelInfo("file_only");
				modelInfoMessage.setModelInfo(modelInfo);
				result = JSONObject.fromObject(modelInfoMessage).toString();
			}
		}
		return result;
	}

	private void initpara() throws IOException {
		try (InputStream inStream = Word2VecForTransformingDocs.class.getClassLoader()
				.getResourceAsStream("appConfig.properties")) {
			Properties prop = new Properties();
			try {
				prop.load(inStream);
			} catch (IOException e) {
				LOG.error("Loading config file is fault");
			}
			RESOURCE_MODEL = prop.getProperty("modelpath");
			TRAIN_FILE = prop.getProperty("trainfile");
			TRNSFORMFILE = prop.getProperty("transformfile");
		}
	}

	//以流的形式返回
	private static void OutPutStream(String data, HttpServletResponse response) throws IOException {
		OutputStream outputStream = response.getOutputStream();// 获取OutputStream输出流
		response.setHeader("content-type", "text/html;charset=UTF-8");// 通过设置响应头控制浏览器以UTF-8的编码显示数据，如果不加这句话，那么浏览器显示的将是乱码
		/**
		 * data.getBytes()是一个将字符转换成字节数组的过程，这个过程中一定会去查码表，
		 * 如果是中文的操作系统环境，默认就是查找查GB2312的码表， 将字符转换成字节数组的过程就是将中文字符转换成GB2312的码表上对应的数字
		 * 比如： "中"在GB2312的码表上对应的数字是98 "国"在GB2312的码表上对应的数字是99
		 */
		/**
		 * getBytes()方法如果不带参数，那么就会根据操作系统的语言环境来选择转换码表，如果是中文操作系统，那么就使用GB2312的码表
		 */
		byte[] dataByteArr = data.getBytes("UTF-8");// 将字符转换成字节数组，指定以UTF-8编码进行转换
		outputStream.write(dataByteArr);// 使用OutputStream流向客户端输出字节数组
	}
	private static boolean getFileSize(File file) throws Exception{
		// 获取文件大小
		FileChannel fc = null;
		FileInputStream fis;
		fis = new FileInputStream(file);
		fc = fis.getChannel();
		long size = fc.size();
		if(size<1024*1024*10*10) //100M大小
		return true;
		else
			return false;
	}
}
